
clear
macro drop _all 
set more off
pause on

global direc_sample "${direc_current}/regression_sample"

* import data
import delimited "${direc_sample}/formC_restricted_sample_line_no1_policystateyear_level.csv", clear

* make the data insurer-state-year level
by cocode year state_id, sort: gen nvals=_n==1
keep if nvals==1
drop nvals

* indicator for whether an insurer in a given state always has a positive sales record 
by state_id cocode, sort: egen stay_is = total(sales_ist==0)
replace stay_is = stay_is==0

* exclude inactive insurers
drop if nplans_ist==0

by cocode year, sort: gen nstate_it = _N

* summarize
eststo clear
eststo sum1: quietly estpost summarize prem_ist age_ist i_worse_claims_ist claims_ist_line7_all_plans nplans_ist share_ist nstate_it if major_ist==1 
eststo sum2: quietly estpost summarize prem_ist age_ist i_worse_claims_ist claims_ist_line7_all_plans nplans_ist share_ist nstate_it if major_ist==0 

* labels 
la var prem_ist "Annual premium"
la var age_ist "Plan age"
la var claims_ist_line7_all_plans "Per-enrollee annual claims"
la var nplans_ist "Plans offered"
la var share_ist "Insurer share of total sales"
la var nstate_it "States where the insurer is active"
la var i_worse_claims_ist "Have higher than anticipated claims"

* export
esttab sum1 sum2 using "sum_data_ist.tex", main(mean) aux(sd) nostar wide  label  nonumber replace  prehead(`"\begin{table}[t]"' `"\begin{center}"' `"\caption{Major vs. fringe firms \label{table.sum.data.ist}}"' `"\begin{tabular}{lcccc} \hline"' `" & \multicolumn{2}{c}{(1)} &  \multicolumn{2}{c}{(2)}  \\"' `"  &  \multicolumn{2}{c}{Major firms} &  \multicolumn{2}{c}{Fringe firms} \\"') postfoot(`"\hline"' `"\end{tabular} "'  `"\end{center}"'   `" \small \emph{Notes:} Data = Form C NAIC reports 2000-2007. The sample is at the insurer-state-year level. The sample consists of insurers that have strictly positive sales  in a given state-year combination. We classify a firm as major if its sales account for at least 5\% of the total market sales; otherwise, the firm is classified as a fringe. The table reports the means with standard deviations in parentheses."' `"\end{table}"')


